home *** CD-ROM | disk | FTP | other *** search
- /*
- * field_index.c --
- * ITIID : $ITI$ $Header $__Header$
- * Author : Huynh Quoc T. Tung
- * Created On : Fri Mar 4 11:15:41 1994
- * Last Modified By: Ulrich Pfeifer
- * Last Modified On: Wed Sep 7 15:14:22 1994
- * Update Count : 114
- * Status : Unknown, Use with caution!
- */
-
- #include "cdialect.h"
- #include "futil.h"
- #include "field_index.h"
- #include "irhash.h" /* STOP_WORD_FLAG */
- #define MAX_FIELDS_NAMES 30
- #define MAX_WORD_LENGTH 20
- #define MAX_LINE_LENGTH 1000
-
- #ifdef STEM_WORDS
- boolean index_stemming = false; /* used in irtfiles.c */
- #endif
-
- static boolean not_field = false; /* not_field = true,
- if line is not to add in a field */
- static boolean is_field = false; /* is_field = true,
- if line is to add in a field */
- static boolean local_not_indexed = false; /* line not indexed */
-
- fields_struct** index_fields = NULL;
-
- nidx_table* Ntable = NULL; /* for lines which are not indexed
- defined in field_y.c */
- ltable* Ltable = NULL; /* for layout */
-
- #ifdef SYSVREGEXP
- /* --------------------- sgrep function --------------------------- */
-
- #include "../regexp/regexp.h"
-
- static int number_of_expbuf = 0;
- regexp **expbuf_set;
-
- /* ------------------------------------------------------------- */
- /* compile regexp only one,
- regexp's compiled are in expbuf_set.
- return 0 if success,
- return -1 if error. */
-
-
-
- long compile_regexp (number_of_regexp, regexp_set)
- long number_of_regexp;
- char** regexp_set;
- {
- long i;
-
- number_of_expbuf = number_of_regexp;
-
- if(number_of_regexp == 0)
- return(0);
- expbuf_set = (regexp **)
- malloc(number_of_regexp*sizeof(regexp *));
- for(i=0; i< number_of_regexp; i++) {
- expbuf_set[i] = regcomp(regexp_set[i]);
- if (expbuf_set[i] == NULL) {
- return(-1);
- }
- }
- return(0);
- }
- /* ------------------------------------------------------------- */
-
- static char* sgrep _AP((char* s, regexp* expbuf, long* begin_pos, long* end_pos));
-
- static char* sgrep(s, expbuf, begin_pos, end_pos)
- char* s;
- regexp* expbuf;
- long* begin_pos;
- long* end_pos;
- {
- if (regexec(expbuf, s)) { /* match */
- if(begin_pos != NULL)
- *begin_pos = expbuf->startp[0] - s;
- if(end_pos != NULL)
- *end_pos = expbuf->endp[0] - s;
- return(expbuf->startp[0]);
- }
- return(NULL);
- }
- /* ------------------------------------------------------------- */
- /* matching line with regexp.
- regexp_pos is the position of compiled regexp.
- */
- char* match(line, begin_pos, end_pos, regexp_pos)
- char* line;
- long* begin_pos;
- long* end_pos;
- long regexp_pos;
- {
- return(sgrep(line, expbuf_set[regexp_pos],
- begin_pos, end_pos));
- }
- #else
- /* --------------------- sgrep function --------------------------- */
-
- #define INIT register char *regexp=instring;
- #define GETC() (*regexp++)
- #define PEEKC() (*regexp)
- #define UNGETC(c) (regexp--)
- #define RETURN(pointer) return(0); /* return 0 on sucess */
- #define ERROR(val) return((char *) val); /* return error code on failure */
- #ifndef SYSVREGEXP
- #ifndef HAVE_GETRNGE
- static getrnge _AP((char *string));
- #endif /* HAVE_GETRNGE */
- #endif /* SYSVREGEXP */
- #include <regexp.h>
-
- char** expbuf_set = NULL;
-
- /* ------------------------------------------------------------- */
- /* compile regexp only one,
- regexp's compiled are in expbuf_set.
- return 0 if success,
- return -1 if error. */
-
- static int* circf_set = NULL;
- static int number_of_expbuf = 0;
-
- long compile_regexp(number_of_regexp, regexp_set)
- long number_of_regexp;
- char** regexp_set;
- {
- long i;
- int status;
- char eof;
- char *c;
- char *endbuf;
-
- number_of_expbuf = number_of_regexp;
-
- if(number_of_regexp == 0)
- return(0);
-
- if(circf_set == NULL)
- circf_set = (int*)s_malloc((size_t)(sizeof(int) * number_of_regexp));
- expbuf_set =
- (char**)s_malloc((size_t)(sizeof(char*) * number_of_regexp));
- for(i=0; i< number_of_regexp; i++) {
- expbuf_set[i] = (char*)s_malloc((size_t)(sizeof(char) * 1024));
- endbuf = (expbuf_set[i])+1024; /* end of buffer */
- status = (int) compile(regexp_set[i], expbuf_set[i], endbuf, eof='\0');
- circf_set[i] = circf;
- if(status)
- return(-1);
- }
- return(0);
- }
- /* ------------------------------------------------------------- */
-
- static char* sgrep _AP((char* s, char* expbuf, int lcircf, long* begin_pos, long* end_pos));
- static char* sgrep(s, expbuf, lcircf, begin_pos, end_pos)
- char* s;
- char* expbuf;
- int lcircf;
- long* begin_pos;
- long* end_pos;
- {
- char *c;
-
- circf = lcircf;
- if (step(s, expbuf)) { /* match */
- if(begin_pos != NULL)
- *begin_pos = loc1 - s;
- if(end_pos != NULL)
- *end_pos = loc2 - s;
- return(loc1);
- }
- return(NULL);
- }
- /* ------------------------------------------------------------- */
- /* matching line with regexp.
- regexp_pos is the position of compiled regexp.
- */
- char* match(line, begin_pos, end_pos, regexp_pos)
- char* line;
- long* begin_pos;
- long* end_pos;
- long regexp_pos;
- {
- return(sgrep(line, expbuf_set[regexp_pos], circf_set[regexp_pos],
- begin_pos, end_pos));
- }
- #endif
- /* ------------------------------------------------------------- */
-
- static void clear_expbuf _AP((void));
- static void clear_expbuf()
- {
- long i;
- for(i=0; i< number_of_expbuf; i++)
- s_free(/* (char *) */(expbuf_set[i]));
- s_free(expbuf_set);
- }
-
- /* ------------------------------------------------------------- */
- static void clear_ntable _AP((void));
- static void clear_ntable()
- {
- long i;
-
- if(Ntable != NULL) {
- s_free(Ntable->begin_tag_pos);
- s_free(Ntable->end_tag_pos);
- s_free(Ntable);
- }
- }
- /* ------------------------------------------------------------- */
-
- static void clear_fields_Ftable _AP((database* db));
- static void clear_fields_Ftable(db)
- database* db;
- {
- long i, j, k;
-
- if(index_fields != NULL) {
- for(i=0; i<db->number_of_fields; i++) {
- j = index_fields[i]->number_of_Ftable;
- while(j > 0) {
- s_free(index_fields[i]->Ftable[j-1].index_kind);
- for(k=0; k < NUMBER_OF_INDEXTYPES; k++) {
- if(index_fields[i]->Ftable[j-1].indextypes[k] == NULL)
- break;
- else s_free(index_fields[i]->Ftable[j-1].indextypes[k]);
- }
- s_free(index_fields[i]->Ftable[j-1].indextypes);
- --j;
- }
- s_free(index_fields[i]->Ftable);
- if (index_fields[i]->field_name) s_free(index_fields[i]->field_name);
- s_free(index_fields[i]);
- }
- s_free(index_fields);
- }
- db->index_fields = NULL;
- return; /* return(0) (up) */
- }
- /* ------------------------------------------------------------- */
-
- static void clear_Ltable _AP((void));
- static void clear_Ltable()
- {
- long i;
-
- if(Ltable != NULL) {
- s_free(Ltable->layout);
- if(Ltable->date_desc != NULL)
- s_free(Ltable->date_desc);
- if(Ltable->sscanf_arg != NULL)
- s_free(Ltable->sscanf_arg);
- s_free(Ltable);
- }
- return; /* return(0); (up) */
- }
-
- /* ------------------------------------------------------------- */
-
- void clear_fields(db)
- database* db;
- {
- clear_fields_Ftable(db);
- clear_Ftable_pos();
- clear_ntable();
- clear_Ltable();
- clear_expbuf();
- return; /* return(0); (up) */
- }
-
- /* ------------------------------------------------------------- */
- static index_fields_struct* index_fields_array;
- long Maxfield_names = 0;
- long Nfield_names = 0;
-
- static void clear_index_fields_array _AP((long number_of_elements));
- static void clear_index_fields_array(number_of_elements)
- long number_of_elements;
- {
- long i;
-
- if(index_fields_array != NULL) {
- for(i=0; i<number_of_elements; i++) {
- s_free(index_fields_array->field_names[i]);
- }
- s_free(index_fields_array->numeric);
- /* should'nt we free this too ?(up) */
- s_free(index_fields_array->field_names);
- s_free(index_fields_array);
- }
- Maxfield_names = 0;
- Nfield_names = 0;
- }
- /* ------------------------------------------------------------- */
-
-
- /* store the name and field_id of fields to generate */
-
- long save_index_fields(field_name, number_of_elements, field_id)
- char* field_name;
- long* number_of_elements;
- long *field_id;
- {
- long i = 0;
- long len;
-
- if(index_fields_array != NULL) {
- for(i=0; i<*number_of_elements; i++) {
- if(field_name == NULL) {
- if(index_fields_array->field_names[i] == NULL) {
- *field_id = i;
- return(0);
- }
- }
- else {
- if(index_fields_array->field_names[i] != NULL)
- if(!strcmp(field_name, index_fields_array->field_names[i])) {
- *field_id = i;
- return(0);
- }
- }
- }
- if(Nfield_names >= Maxfield_names) {
- Maxfield_names += MAX_FIELDS_NAMES;
- index_fields_array->field_names =
- (char**)s_realloc(index_fields_array->field_names,
- (size_t)(sizeof(char*) * Maxfield_names));
- index_fields_array->numeric =
- (boolean*)s_realloc(index_fields_array->numeric,
- (size_t)(sizeof(boolean) * Maxfield_names));
- }
- ++Nfield_names;
- *field_id = i;
- *number_of_elements += 1;
- index_fields_array->numeric[i] = false;
- if(field_name != NULL) {
- len = strlen(field_name);
- index_fields_array->field_names[i] =
- (char*)s_malloc((size_t)(sizeof(char) * (len + 2)));
- s_strncpy(index_fields_array->field_names[i], field_name, len + 1);
- }
- else index_fields_array->field_names[i] = NULL;
- }
- else {
- index_fields_array = (index_fields_struct*)
- s_malloc((size_t)(sizeof(index_fields_struct)));
- Maxfield_names = Nfield_names + MAX_FIELDS_NAMES;
- index_fields_array->field_names = /* nobody frees this? (up) */
- (char**)s_malloc((size_t)(sizeof(char*) * Maxfield_names));
- index_fields_array->numeric =
- (boolean*)s_malloc((size_t)(sizeof(boolean) * Maxfield_names));
- if(field_name != NULL) {
- len = strlen(field_name);
- index_fields_array->field_names[i] =
- (char*)s_malloc((size_t)(sizeof(char) * (len + 2)));
- s_strncpy(index_fields_array->field_names[i],field_name, len + 1);
- }
- else index_fields_array->field_names[i] = NULL;
- index_fields_array->numeric[i] = false;
- ++Nfield_names;
- *field_id = 0;
- *number_of_elements += 1;
- }
- return(1);
- }
- /* ------------------------------------------------------------- */
-
- void save_numeric_index_fields(field_id)
- long field_id;
- {
- index_fields_array->numeric[field_id] = true;
- }
- /* ------------------------------------------------------------- */
-
- static long* Ftable_pos = NULL;
-
- void clear_Ftable_pos()
- {
- if(Ftable_pos != NULL)
- s_free(Ftable_pos);
- }
-
-
- /* ------------------------------------------------------------- */
- /* checking how a line should be indexed.
- */
-
- long how_index_line(field_id, line,
- number_of_not_ended_section,
- document_id,
- weight,
- file_position_before_line,
- line_length,
- newline_terminated,
- db,
- wordfunction1,
- wordfunction2,
- word_position, word_pairs,
- minwordlen, type)
- long field_id;
- char* line;
- long* number_of_not_ended_section;
- long document_id;
- long weight;
- long file_position_before_line;
- long *line_length;
- boolean *newline_terminated;
- database* db;
- wordfunc* wordfunction1;
- wordfunc* wordfunction2;
- boolean word_position, word_pairs;
- int minwordlen;
- char* type;
- {
- long i, fpos, tmplen;
- long char_count;
- long lnumber_of_not_ended_section;
- long begin_pos; /* This is a pointer to the
- * first character that matched
- * the regular expression.
- */
- long end_pos; /* This is a pointer to the
- * to the character after the
- * last character that matches
- * the regular expression.
- */
- long index_pos, n_index_pos;
- long rest;
- char* matchline = NULL;
- char tmpline[MAX_LINE_LENGTH];
- char indexline[MAX_LINE_LENGTH];
-
- s_strncpy(tmpline, line, MAX_LINE_LENGTH);
- tmplen = strlen(tmpline);
- /* if(tmpline[tmplen - 1] == '\n')
- tmpline[tmplen - 1] = '\0'; */
- s_strncpy(indexline, line, MAX_LINE_LENGTH);
-
- /* if the end_marke of previous line not found then
- continues to search for the end_marke.
- */
- if(*number_of_not_ended_section > 0) {
- lnumber_of_not_ended_section = *number_of_not_ended_section;
- for(i=0; i<*number_of_not_ended_section; i++) {
- if(Ftable_pos[i] > -1) {
- fpos = Ftable_pos[i];
- if(match(tmpline,&end_pos,NULL,
- db->index_fields[field_id]->Ftable[fpos].end_tag_pos)) {
- if(end_pos > 0) /* copy n-character until end_pos */
- s_strncpy(indexline, tmpline, end_pos + 1);
- else indexline[0] = '\0'; /* if the end_marke at begin of line then not index this line */
- Ftable_pos[i] = -1;
- --lnumber_of_not_ended_section;
- }
- /* end_marke not yet found */
- else s_strncpy(indexline, tmpline, MAX_LINE_LENGTH);
- }
- else --lnumber_of_not_ended_section;
- if(indexline[0] != '\0') {
- if(-1 == index_line_section(field_id, fpos,
- indexline,
- document_id,
- weight,
- file_position_before_line,
- line_length,
- newline_terminated,
- db,
- wordfunction1,
- wordfunction2,
- word_position, word_pairs,
- minwordlen, type))
- waislog(WLOG_HIGH, WLOG_ERROR, "map_over_words failed");
- }
- }
- if(lnumber_of_not_ended_section == 0)
- *number_of_not_ended_section = 0;
- }
-
- for(i=0; i < db->index_fields[field_id]->number_of_Ftable; i++) { /* check for field_name */
- if((matchline = s_strdup(match(tmpline, &begin_pos, &end_pos,
- db->index_fields[field_id]->Ftable[i].begin_tag_pos)))) {
- /* match field */
- for(char_count=0; char_count<begin_pos; char_count++)
- indexline[char_count] = ' ';
- s_strncpy(&indexline[begin_pos], matchline, MAX_LINE_LENGTH - begin_pos);
-
- /* the index position is giving.
- The line should be indexed exactly at index_pos.
- */
- if(db->index_fields[field_id]->Ftable[i].index_pos > -1) {
- if(matchline != NULL) s_free(matchline);
- if(match(tmpline, NULL, &index_pos,
- db->index_fields[field_id]->Ftable[i].index_pos)) {
- for(char_count=0; char_count<index_pos - 1; char_count++)
- indexline[char_count] = ' ';
- if(match(&tmpline[index_pos-1], &end_pos, NULL, db->index_fields[field_id]->Ftable[i].end_tag_pos))
- s_strncpy(&indexline[index_pos-1], &tmpline[index_pos-1],end_pos + 1);
- else {
- s_strncpy(&indexline[index_pos-1], &tmpline[index_pos-1], MAX_LINE_LENGTH - index_pos - 1);
- if(Ftable_pos == NULL)
- Ftable_pos = (long*)s_malloc((size_t)sizeof(long) * 10);
- else {
- if(*number_of_not_ended_section >= 10) {
- Ftable_pos = (long*) s_realloc(Ftable_pos,
- (size_t)sizeof(long) * (*number_of_not_ended_section + 10));
- }
- }
- Ftable_pos[*number_of_not_ended_section] = i;
- ++(*number_of_not_ended_section);
- }
- }
- }
- else {
- for(char_count=0; char_count<begin_pos; char_count++)
- indexline[char_count] = ' ';
- rest = end_pos - begin_pos;
- if(match(&tmpline[end_pos], &end_pos, NULL, db->index_fields[field_id]->Ftable[i].end_tag_pos))
- s_strncpy(&indexline[begin_pos], matchline, end_pos + rest + 1);
- else {
- if(Ftable_pos == NULL)
- Ftable_pos = (long*)s_malloc((size_t)sizeof(long) * 10);
- else {
- if(*number_of_not_ended_section >= 10) {
- Ftable_pos = (long*) s_realloc(Ftable_pos,
- (size_t)sizeof(long) * (*number_of_not_ended_section + 10));
- }
- }
- Ftable_pos[*number_of_not_ended_section] = i;
- ++(*number_of_not_ended_section);
- }
- }
-
- /* It is a numeric field. */
-
- if(db->index_fields[field_id]->numeric) {
-
- if(db->index_fields[field_id]->n_index_pos > -1) {
-
- /* Indexing line at n_index_pos,
- or at index_pos or at beginning of line
- */
- if(match(tmpline, NULL, &n_index_pos,
- db->index_fields[field_id]->n_index_pos)) {
- for(char_count=0; char_count<n_index_pos - 1; char_count++)
- indexline[char_count] = ' ';
- if(db->index_fields[field_id]->numeric_len > 0) {
- s_strncpy(&indexline[n_index_pos-1], &tmpline[n_index_pos-1],
- db->index_fields[field_id]->numeric_len + 1);
- } else {
- if(end_pos > 0) {
- s_strncpy(&indexline[n_index_pos-1], &tmpline[n_index_pos-1],end_pos + 1);
- } else {
- s_strncpy(&indexline[n_index_pos-1], &tmpline[n_index_pos-1],
- MAX_LINE_LENGTH - n_index_pos);
- }
- }
- }
- }
- else {
- if(db->index_fields[field_id]->numeric_len > 0) {
- indexline[db->index_fields[field_id]->numeric_len] = '\0';
- }
- }
- }
-
- if(-1 == index_line_section(field_id, i,
- indexline,
- document_id,
- weight,
- file_position_before_line,
- line_length,
- newline_terminated,
- db,
- wordfunction1,
- wordfunction2,
- word_position, word_pairs,
- minwordlen, type))
- waislog(WLOG_HIGH, WLOG_ERROR, "map_over_words failed");
-
- if(matchline != NULL) s_free(matchline);
- }
- }
- if(matchline != NULL) s_free(matchline);
- return(0);
- }
- /* ------------------------------------------------------------- */
-
- /* line: it is a line that is not to index.
- line_length: number of character of line.
- newline_terminated = true if the line terminates with a newline,
- else newline_terminated = false.
- */
-
- long count_words(line, line_length, newline_terminated)
- char* line;
- long* line_length;
- boolean* newline_terminated;
- {
- unsigned long ch;
- long char_count = 0;
- long word_len = 0;
- long word_count = 0;
-
- for(ch = (unsigned char)line[char_count++];
- ch != '\0'; ch = (unsigned char)line[char_count++]){
-
- boolean alnum = isalnum(ch);
-
- if(alnum)
- word_len++;
- else {
- if(word_len != 0)
- if(word_len >= 2)
- word_count++;
- word_len = 0;
- }
- }
- if(newline_terminated != NULL){
- if('\n' != line[char_count-2])
- *newline_terminated = false;
- else
- *newline_terminated = true;
- }
- if(line_length != NULL)
- *line_length = char_count - 1;
- return(word_count);
- }
- /* ------------------------------------------------------------- */
-
- /* The line should be indexed as in the field_description file */
-
- long index_line_section(field_id, fpos,
- line,
- document_id,
- weight,
- file_position_before_line,
- line_length,
- newline_terminated,
- db,
- wordfunction1,
- wordfunction2,
- word_position, word_pairs,
- minwordlen, type)
- long field_id;
- long fpos;
- char* line;
- long document_id;
- long weight;
- long file_position_before_line;
- long *line_length;
- boolean *newline_terminated;
- database* db;
- wordfunc* wordfunction1;
- wordfunc* wordfunction2;
- boolean word_position, word_pairs;
- int minwordlen;
- char* type;
- {
- long i = 0;
- long number_of_words = 0;
-
- char** indextypes = db->index_fields[field_id]->Ftable[fpos].indextypes;
- int* index_kind = db->index_fields[field_id]->Ftable[fpos].index_kind;
-
- while(indextypes[i]) {
- if(!strncmp(indextypes[i],TEXT,strlen(TEXT)))
- type = NULL;
- else {
- type = indextypes[i];
- }
- if(index_kind[i] == I_GLOBAL) {
- not_field = true;
- is_field = false;
- }
- else {
- not_field = false;
- is_field = true;
- }
-
- if(not_field) {
- #ifdef STEM_WORDS
- if(db->stemming)
- index_stemming = true;
- else index_stemming = false;
- #endif
- number_of_words = map_over_words(line, document_id,
- weight,
- file_position_before_line,
- line_length,
- newline_terminated,
- db,
- wordfunction1,
- word_position, word_pairs,
- #ifdef SOUND
- minwordlen, type);
- #else
- minwordlen);
- #endif
- #ifdef STEM_WORDS
- index_stemming = false;
- #endif
- }
- else { /* is_field */
- #ifdef STEM_WORDS
- if(db->fields[field_id].stemming)
- index_stemming = true; /* used in map_over_words of irtfiles.c */
- else index_stemming = false;
- #endif
- number_of_words = map_over_words(line, document_id,
- weight,
- file_position_before_line,
- line_length,
- newline_terminated,
- db,
- wordfunction2,
- word_position, word_pairs,
- #ifdef SOUND
- minwordlen, type);
- #else
- minwordlen);
- #endif
- #ifdef STEM_WORDS
- index_stemming = false;
- #endif
- }
- ++i;
- }
- if(is_field)
- db->fields[field_id].total_word_count += number_of_words;
- return(number_of_words);
- }
- /* ------------------------------------------------------------- */
-
- static char *rmprefix = NULL;
-
- static int rmselector(file)
- struct dirent *file;
- {
- /* printf("selector: %s\n", file->d_name); */
- return(!strncmp(file->d_name,rmprefix,strlen(rmprefix)));
- }
-
- static int prefremove(dir, prefix)
- char *dir, *prefix;
- {
- struct dirent **matches;
- char path[MAX_FILENAME_LEN];
- int i;
-
- rmprefix = prefix;
- strcpy(path,dir);
- strncat(path,"/",MAX_FILENAME_LEN);
-
- if ( scandir(dir, &matches, rmselector, NULL) > 0 ) {
- for(i=0;matches[i];i++) {
- path[strlen(dir)+1] = '\0';
- strncat(path,matches[i]->d_name,MAX_FILENAME_LEN);
- s_free(matches[i]);
- waislog(WLOG_LOW, WLOG_INFO, "deleting \"%s\"", path);
- if (unlink(path)) {
- waislog(WLOG_HIGH, WLOG_ERROR, "unlink failed");
- }
- }
- s_free(matches);
- }
- return(i);
- }
-
-
- /* insert all fields to create */
- boolean init_index_fields(adding_to_existing_index, create_new_fields,
- field_adding_to_existing_index, number_of_fields, db)
- boolean* adding_to_existing_index;
- boolean* create_new_fields;
- boolean field_adding_to_existing_index;
- long number_of_fields;
- database* db;
- {
- long i, j;
- long fno_len, fna_len, em_len, idx_len, nidx_flen, nidx_elen;
- long number_of_fields_not_exists = 0;
- char file[1001];
- /* char* system_call; */
- char* field_name;
-
- if(number_of_fields == 0)
- return(0);
-
- db->fields =
- (field_db*)s_malloc((size_t)(sizeof(field_db) * number_of_fields));
- if(db->field_index_streams == NULL)
- db->field_index_streams = (FILE**)s_malloc((size_t)sizeof(FILE*));
- if(db->field_dictionary_streams == NULL)
- db->field_dictionary_streams = (FILE**)s_malloc((size_t)sizeof(FILE*));
-
- if(*create_new_fields && !field_adding_to_existing_index) {
- field_name = (char*)s_malloc((size_t)(sizeof(char) * (MAX_FILENAME_LEN + 1)));
- db->number_of_fields = 0;
- }
-
- for(i= 0; i< number_of_fields; i++) {
- /* only fields which not exist will be created */
- if(*create_new_fields && !field_adding_to_existing_index) {
- if(index_fields_array->field_names[i] != NULL) {
- s_strncpy(field_name, db->database_file, MAX_FILENAME_LEN);
- s_strncat(field_name,field_ext,MAX_FILENAME_LEN,MAX_FILENAME_LEN);
- s_strncat(field_name,index_fields_array->field_names[i],
- MAX_FILENAME_LEN,MAX_FILENAME_LEN);
- s_strncat(field_name,dictionary_ext,MAX_FILENAME_LEN,MAX_FILENAME_LEN);
- if(!probe_file(field_name)) {
- db->fields[number_of_fields_not_exists].field_id = i;
- db->fields[number_of_fields_not_exists].index_file_number = 0;
- db->fields[number_of_fields_not_exists].total_word_count = 0;
- db->fields[number_of_fields_not_exists].numeric = index_fields_array->numeric[i];
- db->fields[number_of_fields_not_exists].stemming = index_fields[i]->stemming;
- fna_len = strlen(index_fields_array->field_names[i]);
- db->fields[number_of_fields_not_exists].field_name =
- (char*)s_malloc((size_t)(sizeof(char) * (fna_len + 2)));
- s_strncpy(db->fields[number_of_fields_not_exists].field_name,
- index_fields_array->field_names[i], fna_len + 1);
- ++number_of_fields_not_exists;
- }
- }
- db->number_of_fields += number_of_fields_not_exists;
- }
- else { /* insert all field names in database */
- db->fields[i].field_id = i;
- db->fields[i].index_file_number = 0;
- db->fields[i].total_word_count = 0;
- if(index_fields_array->field_names[i] != NULL) {
- fna_len = strlen(index_fields_array->field_names[i]);
- db->fields[i].field_name =
- (char*)s_malloc((size_t)(sizeof(char) * (fna_len + 2)));
- s_strncpy(db->fields[i].field_name,index_fields_array->field_names[i],
- fna_len + 1);
- }
- else db->fields[i].field_name = NULL;
- db->fields[i].numeric = index_fields_array->numeric[i];
- db->fields[i].stemming = index_fields[i]->stemming;
- }
- }
- clear_index_fields_array(number_of_fields);
-
- /* delete all fields exist */
-
- if(!*adding_to_existing_index) {
- if(!*create_new_fields) {
- char dir[MAX_FILENAME_LEN];
- char *prefix;
- int files_deleted;
-
- strcpy(dir, db->database_file);
- strncat(dir, field_ext,MAX_FILENAME_LEN);
- for(prefix=dir+strlen(dir);prefix>=dir && *prefix!='/';*prefix--);
- if (*prefix = '/') { /* path given */
- *(prefix++) = '\0';
- files_deleted = prefremove(dir, prefix);
- } else { /* database in current dir */
- files_deleted = prefremove("./", dir);
- }
- /*
- system_call = (char*)s_malloc((size_t)(sizeof(char) * (1000 + 3)));
- strncpy(system_call, "rm ", MAX_FILENAME_LEN + 3);
- s_strncat(system_call, db->database_file,
- MAX_FILENAME_LEN, MAX_FILENAME_LEN);
- s_strncat(system_call, field_ext, MAX_FILENAME_LEN, MAX_FILENAME_LEN);
- s_strncat(system_call, "*", MAX_FILENAME_LEN, MAX_FILENAME_LEN);
- s_strncat(system_call, dictionary_ext,
- MAX_FILENAME_LEN, MAX_FILENAME_LEN);
- system(system_call);
- s_free(system_call);
- */
- }
- }
- /* insert only new fields, old fields not deleted and
- * adding new words in global dictionary
- */
- if(*create_new_fields && field_adding_to_existing_index) {
- *create_new_fields = false;
- }
- /* insert only new fields, old fields not deleted and
- * not updates global fields.
- */
- else if(*create_new_fields && !field_adding_to_existing_index) {
- db->doc_table_allocated_entries = 1;
- s_free(field_name);
- }
- return(0);
- }
-
- /* ------------------------------------------------------------- */
- /* open stream for each field.
- * return 0, success.
- * return 1, error
- */
-
- boolean open_field_streams_init(initialize, field_adding_to_existing_index, field_id, db)
- boolean initialize;
- boolean field_adding_to_existing_index;
- long field_id;
- database* db;
- {
- char file[1001];
-
- if(db->field_index_streams == NULL)
- db->field_index_streams = (FILE**)s_malloc((size_t)sizeof(FILE*));
- if(db->field_dictionary_streams == NULL)
- db->field_dictionary_streams = (FILE**)s_malloc((size_t)sizeof(FILE*));
-
- /* ext_open_database for fields */
- if(initialize) {
- *(db->field_index_streams) = NULL;
- *(db->field_index_streams) =
- s_fopen(field_index_filename(file, db->fields[field_id].field_name, db),
- "w+b");
- if(*(db->field_index_streams) == NULL) {
- waislog(WLOG_HIGH, WLOG_ERROR,
- "2can't open the inverted index file of field %s\n",
- db->fields[field_id].field_name);
- disposeDatabase(db);
- return(1);
- }
- }
- else {
- *(db->field_dictionary_streams) =
- s_fopen(field_dictionary_filename(file,
- db->fields[field_id].field_name, db),
- "r+b");
- if (*(db->field_dictionary_streams) == NULL){
- if(!field_adding_to_existing_index) {
- waislog(WLOG_HIGH, WLOG_ERROR,
- "can't open the word hash file %s\n",file);
- disposeDatabase(db);
- return(1);
- }
- }
- *(db->field_index_streams) =
- s_fopen(field_index_filename(file, db->fields[field_id].field_name, db),
- "r+b");
- if(*(db->field_index_streams) == NULL) {
- if(field_adding_to_existing_index) {
- *(db->field_index_streams) =
- s_fopen(field_index_filename(file,db->fields[field_id].field_name,db),
- "w+b");
- if(*(db->field_index_streams) == NULL) {
- waislog(WLOG_HIGH, WLOG_ERROR,
- "2can't open the inverted index file of field %s\n",
- db->fields[field_id].field_name);
- disposeDatabase(db);
- return(1);
- }
- }
- else {
- waislog(WLOG_HIGH, WLOG_ERROR,
- "2can't open the inverted index file of field %s\n",
- db->fields[field_id].field_name);
- disposeDatabase(db);
- return(1);
- }
- }
- }
- return(0);
- }
-
- boolean open_global_Database(initialize, db)
- boolean initialize;
- database* db;
- {
- char file[MAX_FILE_NAME_LEN + 1 ];
-
- if(initialize == false) {
- db->dictionary_stream = s_fopen(dictionary_filename(file, db), "r+b");
- if (db->dictionary_stream == NULL) {
- waislog(WLOG_HIGH,WLOG_ERROR,"can't open the word hash file %s\n",file);
- disposeDatabase(db);
- return(false);
- }
- }
- return(ext_open_database(db, initialize, false));
- }
-